library(tigris)
## Warning: package 'tigris' was built under R version 3.5.1
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
## 
##     plot
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.5.1
library(sp)
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.5.1
## Loading required package: ggplot2
library(maptools)
## Checking rgeos availability: FALSE
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
library(broom)
library(httr)
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.5.1
## rgdal: version: 1.3-3, (SVN revision 759)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/Peter Farquharson/Documents/R/win-library/3.5/sf/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/Peter Farquharson/Documents/R/win-library/3.5/sf/proj
##  Linking to sp version: 1.3-1
library(tidyr)
library(tidyverse)
## -- Attaching packages ------------------------------------- tidyverse 1.2.1 --
## v tibble  1.4.2     v purrr   0.2.5
## v readr   1.1.1     v stringr 1.3.1
## v tibble  1.4.2     v forcats 0.3.0
## -- Conflicts ---------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(httr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
data <- read.csv("201407-citibike-tripdata.csv")
data <- mutate(data, ymd = as.Date(starttime))


##1.Create a data frame that has the unique name, latitude, and longitude for each Citibike station that was present in the system in July 2014

#df <- data.frame(data$start.station.name, data$start.station.latitude, data$start.station.longitude)
#df2 <- data.frame(data$end.station.name , data$end.station.latitude, data$end.station.longitude)

start_stations <- data %>% group_by(name = start.station.name, latitude = start.station.latitude, longitude = start.station.longitude) %>% summarize()

end_stations <- data %>%  group_by(name = end.station.name, latitude = end.station.latitude, longitude = end.station.longitude) %>% summarize()

all_stations <- merge(start_stations, end_stations)
#Make a map showing the location of each Citibike station using ggmap
#Do the same using leaflet, adding a popup that shows the name of the station when it's clicked on
citi_map <- get_map(location = c(lon = -74.04, lat = 40.72), maptype = "terrain",  zoom = 12) 
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=40.72,-74.04&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
ggmap(citi_map)+
geom_point(data = all_stations, aes(x = longitude, y = latitude))

leaflet() %>% addTiles() %>% setView(-74.045448, 40.726393, zoom = 12) %>% addProviderTiles("CartoDB.Positron") %>% addMarkers(~longitude, ~latitude , popup = ~name, data = all_stations)
#Then do a spatial join to combine this data frame with the Pediacities NYC neighborhood shapefile data
r <- GET('http://data.beta.nyc//dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/35dd04fb-81b3-479b-a074-a27a37888ce7/download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson')

nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
df_spatial <-data.frame(latitude = all_stations$latitude, longitude = all_stations$longitude)

points_spf <- df_spatial
coordinates(points_spf) <- ~longitude + latitude

proj4string(points_spf) <- proj4string(nyc_neighborhoods)
matches<- over(points_spf, nyc_neighborhoods)
df_spatial<- cbind(df_spatial, matches)
#Make a map showing the number of unique Citibike stations in each neighborhood
#First do this using ggmap where the fill color encodes the number of stations


neighbourhood_stations <- df_spatial %>% group_by(neighborhood) %>% summarize(num_stations = n())
#geojoin to join data frame into spatial data frame
map <- geo_join(nyc_neighborhoods, neighbourhood_stations, "neighborhood", "neighborhood")

pal <- colorNumeric(palette = "RdBu",
domain = range(map@data$num_stations, na.rm=T))

citi_visual <- tidy(nyc_neighborhoods, neighborhood = "neighborhood") %>% left_join(., neighbourhood_stations, by = c(id = "neighborhood"))
## Regions defined for each Polygons
## Warning: Column `id`/`neighborhood` joining character vector and factor,
## coercing into character vector
ggmap(citi_map) + geom_point(data = citi_visual, aes(x = long, y = lat, fill = num_stations))
## Warning: Removed 27503 rows containing missing values (geom_point).

#Then do the same using leaflet, adding a popup that shows the number of stations in a neighborhood when its shape is clicked on
points <- df_spatial %>% left_join(neighbourhood_stations)
## Joining, by = "neighborhood"
leaflet(map) %>% addTiles() %>% addPolygons(fillColor = ~pal(num_stations), popup = ~neighborhood) %>% addMarkers(~longitude, ~latitude, popup = ~num_stations, data = points) %>%  addProviderTiles("CartoDB.Positron") %>% setView(-74.045448, 40.726393, zoom = 10)
#Now create a new data frame that has the total number of trips that depart from each station at each hour of the day on July 14th

hour_trips <- data %>%filter(ymd == "2014-07-14") %>% mutate(hour = hour(starttime)) %>% group_by(name = start.station.name, hour, latitude = start.station.latitude, longitude= start.station.longitude) %>% summarize(num_trips = n()) %>% data.frame()

#Do a spatial join to combine this data frame with the Pediacities NYC neighborhood shapefile data

hour_trips_spd <- hour_trips
coordinates(hour_trips_spd) <- ~longitude + latitude
proj4string(hour_trips_spd) <- proj4string(nyc_neighborhoods)
matches <- over(hour_trips_spd, nyc_neighborhoods)
hour_trips <- cbind(hour_trips, matches)
#Make a ggmap plot showing the number of trips that leave from each neighborhood at 9am, 1pm, 5pm, and 10pm, faceted by hour, where each facet contains a map where the fill color encodes the number of departing trips in each neighborhood
neighborhood_trips <- hour_trips %>%filter(hour%in%c(9,13,17,22)) %>%
  group_by(neighborhood, hour) %>%   summarize(count = sum(num_trips))

pal2 <- colorNumeric(palette = "RdBu",domain = range(neighborhood_trips$count, na.rm = T))


citi_visual2 <- tidy(nyc_neighborhoods) %>% left_join(., neighborhood_trips, by = c(id = "neighborhood"))
## Regions defined for each Polygons
## Warning: Column `id`/`neighborhood` joining character vector and factor,
## coercing into character vector
ggmap(citi_map) + geom_polygon(data = citi_visual2, aes(x =  long, y = lat, fill = count)) + facet_wrap(~hour)